#############################################################
# Replication Code For Berz & Jankowski 2022 Party Politics # 
# Local Preferences in Candidate Selection                  #
#############################################################

# Load some helper functions

source("plot_amce.R")
source("read_qualtrics.R")

# load required packages

library(tidyverse)
library(cregg)
library(cjoint)

# load data in survey format

df <- read.csv("cj_data_prepared.csv")

# identify conjoint variables

cj_vars <- colnames(df)[grepl("cj_[a-z]{2}[0-9]", colnames(df))]

# Read in data for conjoint analysis (takes approx. 1min.)

cj_data <- read_qualtrics(df_object = df, 
                          responses = cj_vars, 
                          respondentID = "ResponseId")

cj_save <- cj_data

# N OBS

nrow(cj_data)/20

# Adjust colnames

colnames(cj_data)[1:4] <- c("response_id", 
                             "qtx_id", 
                             "task", 
                             "profile")

names(cj_data) <- rvest::repair_encoding(names(cj_data))

cj_data <- rename(cj_data, 
                 lokalgremien = "Arbeitet regelm��ig in lokalen Parteigremien",
                 erfahrung = "Erfahrung in der Lokalpolitik",
                 wahlkreis = "Wohnt im Wahlkreis seit", 
                 abweichung = "Stellt eigene �berzeugung vor Position der Partei",
                 geschlecht = "Geschlecht", 
                 bildung = "Bildung",
                 alter = "Alter")

df_survey <- readRDS("df_survey.rds")

cj_data <- merge(cj_data, 
                df_survey, 
                by = "qtx_id", 
                all.x = TRUE)

# Identify responses for Bundestag and Landtag

cj_data$level <- "Bundestag"
cj_data$level[cj_data$task %in% 6:10] <- "Landtag"

# Adjust Factor Levels

cj_data$lokalgremien <- factor(cj_data$lokalgremien, 
                               levels = c("Nein", "Ja"))
# Eng transl
levels(cj_data$lokalgremien) <- c("No", "Yes")

cj_data$geschlecht <- factor(cj_data$geschlecht, 
                             levels = c("Männlich", "Weiblich"))
# Eng transl
levels(cj_data$geschlecht) <- c("Male", "Female")


cj_data$bildung <- factor(cj_data$bildung, 
                   levels = c("Hauptschule + Berufsausbildung", 
                             "Mittlere Reife + Berufsausbildung",
                             "Abitur + Berufsausbildung",
                             "Abitur + Studium"))

# Eng transl
levels(cj_data$bildung) <- c("Low", 
                                     "Moderate",
                                     "High",
                                     "Very high")

cj_data$wahlkreis <- factor(cj_data$wahlkreis, 
                           levels = c("der Geburt", "15 Jahren", "8 Jahren", "2 Jahren"))
# Eng transl
levels(cj_data$wahlkreis) <- c("since birth", "15 years", "8 years", "2 years")

cj_data$erfahrung <- factor(cj_data$erfahrung, 
                          levels = c("Keine", "1 Jahr", "4 Jahre", "7 Jahre"))
# Eng transl
levels(cj_data$erfahrung) <- c("None", "1 year", "4 years", "7 years")


cj_data$abweichung <- factor(cj_data$abweichung, 
                             levels = c("nie", "selten", "gelegentlich", "häufig"))

# Eng transl
levels(cj_data$abweichung) <- c("Never", "Rarely", "Occasionally", "Frequently")

# Eng transl age
levels(cj_data$alter) <- c("23 years", "31 years", "39 years", "46 years", "57 years")


# Identify first framing

cj_data$first_framing <- as.numeric(cj_data$LTFIRST == 1 & cj_data$task >= 6 |
                                      cj_data$BTFIRST == 1 & cj_data$task <= 5)

#####################################################
# Analyse
#####################################################

cj_data <- cj_data %>% 
  arrange(response_id, task, profile)

cj_data$level_bund <- as.numeric(cj_data$level == "Bundestag")

# Marginal Means

mm_all <- cj(cj_data, 
             selected ~ geschlecht + alter + bildung + wahlkreis + erfahrung + lokalgremien + abweichung,
             id = ~ response_id, 
             estimate = "mm",
             feature_labels = list(geschlecht = "Gender",
                                   alter = "Age",
                                   bildung = "Education",
                                   erfahrung = "Experience in\nlocal politics...",
                                   wahlkreis = "Residence in\ndistrict since...",
                                   lokalgremien = "Engagement in\nlocal party branch...",
                                   abweichung = "Deviates from\nparty line..."))

# Main Plot (FIGURE 2 in MAIN PAPER)

ggplot(mm_all, 
       aes(x = level, 
           y = estimate,
           color = feature)) +
  coord_flip() +
  geom_hline(yintercept = 0.5, lty = 2) +
  geom_point(size = 1.5, 
             position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymax = lower, 
                    ymin = upper, 
                    width = 0),
                position = position_dodge(width = 0.5)) +
  theme_bw() +
  facet_grid(feature ~ ., 
             scales  ="free", 
             space = "free",
             switch = "y") +
  scale_x_discrete(position="top") +
  theme(plot.background = element_rect(fill = 'white'),
        axis.text.y = element_text(size=10.5, 
                                   color = "black"),
        axis.text.x = element_text(angle = 0,
                                   size=10.5, 
                                   color = "black"),
        strip.text.y = element_text(angle = 90,
                                    color = "black",
                                    size = 10.5),
        strip.text.y.left = element_text(angle = 0),
        strip.background = element_rect(fill = 'white')) +
  theme(plot.margin=unit(c(1,1,1,3),
                         "line"),
        panel.border = element_rect(colour = "black"),
        legend.position = "bottom") +
  xlab("") +
  ylab("Marginal Means") +
  guides(color = FALSE) + 
  scale_color_manual(values = viridis::inferno(8))

ggsave(file = "all_mm_eng.pdf", width = 8, height = 8)

###################################################
# Interaction by Deviation (Figure 3 and 4)
###################################################

ols <- lm(selected ~ geschlecht + 
            alter + 
            bildung + 
            wahlkreis + 
            erfahrung + 
            lokalgremien + 
            abweichung*sat_loc_bi +
            abweichung*sat_nat_bi,
          data = cj_data)

## MM PARTY

predLoc <- prediction::prediction(ols,
                                  at = list(abweichung = unique(cj_data$abweichung),
                                            sat_loc_bi = unique(cj_data$sat_loc_bi))) %>%
  summary() %>% as.data.frame()


names(predLoc)[1:2] <- c("abweichung", "sat_loc_bi")

predLoc$Level <- "Satisfaction with party at local level"

predLoc$Estimate <- "Marginal Means"

predNat <- prediction::prediction(ols,
                                  at = list(abweichung = unique(cj_data$abweichung),
                                            sat_nat_bi = unique(cj_data$sat_nat_bi))) %>%
  summary() %>% as.data.frame()

names(predNat)[1:2] <- c("abweichung", "sat_nat_bi")

predNat$Level <- "Satisfaction with party at national level"

predNat$Estimate <- "Marginal Means"

# DIFF MM LOC

diffMMloc <- margins::margins(ols,
                              var = "sat_loc_bi",
                              at = list(abweichung = unique(cj_data$abweichung))) %>%
  summary() %>% as.data.frame()

diffMMloc$Level <- "Satisfaction with party at local level"

diffMMloc$sat_loc_bi <- "unsatisfied"

colnames(diffMMloc)[3] <- c("Prediction")

diffMMloc$Estimate <- "Diff. in Marginal Means"

# DIFF MM NAT

diffMMnat <- margins::margins(ols,
                              var = "sat_nat_bi",
                              at = list(abweichung = unique(cj_data$abweichung))) %>%
  summary() %>% as.data.frame()

diffMMnat$Level <- "Satisfaction with party at national level"

diffMMnat$sat_nat_bi <- "unsatisfied"

colnames(diffMMnat)[3] <- c("Prediction")

diffMMnat$Estimate <- "Diff. in Marginal Means"

combined_loc <- bind_rows(predLoc, diffMMloc)
combined_nat <- bind_rows(predNat, diffMMnat)

combined_loc$Estimate <- factor(combined_loc$Estimate,
                                levels = c("Marginal Means",
                                           "Diff. in Marginal Means"))

combined_nat$Estimate <- factor(combined_nat$Estimate,
                                levels = c("Marginal Means",
                                           "Diff. in Marginal Means"))

dflines <- data.frame(linepos = c(0.5, 0), Estimate = c("Marginal Means",
                                                        "Diff. in Marginal Means"))

dflines$Estimate <- factor(dflines$Estimate,
                           levels = c("Marginal Means",
                                      "Diff. in Marginal Means"))

ggplot(combined_loc, aes(x = abweichung, 
                         y = Prediction,
                         color = sat_loc_bi,
                         ymin = lower, 
                         ymax = upper,
                         shape = sat_loc_bi)) +
  geom_hline(data = dflines, aes(yintercept = linepos), lty = 2) +
  geom_pointrange(position = position_dodge(0.4)) +
  facet_wrap(~ Estimate, scales = "free_x") +
  coord_flip() +
  theme_bw() +
  xlab("Deviation from party line...") +
  ylab("Estimate") +
  theme(legend.position = "bottom") +
  scale_color_manual("Satisfaction with party\nat local level", values = c("blue", "orange")) +
  scale_shape_manual("Satisfaction with party\nat local level", values = c(15:16))

ggsave(file = "local_int.pdf", width = 7, height = 4)

ggplot(combined_nat, aes(x = abweichung, y = Prediction,
                         color = sat_nat_bi,
                         ymin = lower, 
                         ymax = upper,
                         shape = sat_nat_bi)) +
  geom_hline(data = dflines, aes(yintercept = linepos), lty = 2) +
  geom_pointrange(position = position_dodge(0.4)) +
  facet_wrap(~ Estimate, scales = "free_x") +
  coord_flip() +
  theme_bw() +
  xlab("Deviation from party line...") +
  ylab("Estimate") +
  theme(legend.position = "bottom") +
  scale_color_manual("Satisfaction with party\nat national level", values = c("blue", "orange")) +
  scale_shape_manual("Satisfaction with party\nat national level", values = c(15:16))

ggsave(file = "national_int.pdf", width = 7, height = 4)

###################################################
# Interaction Female and Party (FIGURE 5 and 6)
###################################################

cj_data$Female <- as.factor(ifelse(cj_data$female == 1, 
                                   "Female",
                                   "Male"))

cj_data$Female <- relevel(cj_data$Female, ref = "Male")

cj_data$party[cj_data$party == "Die Linke"] <- "Left Party"

cj_data$Party <- as.factor(cj_data$party)
cj_data$Party <- factor(cj_data$Party, levels = c("AfD",
                                                  "CDU/CSU",
                                                  "Left Party",
                                                  "FDP",
                                                  "SPD",
                                                  "Greens"))

ols <- lm(selected ~ abweichung + 
            alter + 
            bildung + 
            wahlkreis + 
            erfahrung + 
            lokalgremien + 
            geschlecht*Party +
            geschlecht*Female,
          data = cj_data)

## MM PARTY

predParty <- prediction::prediction(ols,
                                at = list(geschlecht = unique(cj_data$geschlecht),
                                          Party = unique(cj_data$Party))) %>%
  summary() %>% as.data.frame()


names(predParty)[1:2] <- c("geschlecht", "Party")

predParty$Level <- "Party preference of respondent"

predParty$Estimate <- "Marginal Means"

# MM FEMALE

predsFemale <- prediction::prediction(ols,
                                 at = list(geschlecht = unique(cj_data$geschlecht),
                                           Female = unique(cj_data$Female))) %>%
  summary() %>% as.data.frame()


names(predsFemale)[1:2] <- c("geschlecht", "Gender")

predsFemale$Level <- "Gender of respondent"

predsFemale$Estimate <- "Marginal Means"

# DIFF MM FEMALE

diffMMfemale <- margins::margins(ols,
                                 var = "Female",
                                 at = list(geschlecht = unique(cj_data$geschlecht))) %>%
  summary() %>% as.data.frame()

diffMMfemale$Level <- "Gender of respondent"

diffMMfemale$Gender <- "Female"

colnames(diffMMfemale)[3] <- c("Prediction")

diffMMfemale$Estimate <- "Diff. in Marginal Means"

# DIFF MM PARTY

diffMMparty <- margins::margins(ols,
                              var = "Party",
                              at = list(geschlecht = unique(cj_data$geschlecht))) %>%
  summary() %>% as.data.frame()

diffMMparty$Level <- "Party preference of respondent"

diffMMparty$Party <- gsub("^Party","",diffMMparty$factor)

colnames(diffMMparty)[3] <- c("Prediction")

diffMMparty$Estimate <- "Diff. in Marginal Means"

combined_female <- bind_rows(predsFemale, diffMMfemale)

combined_female$Estimate <- factor(combined_female$Estimate,
                                   levels = c("Marginal Means",
                                              "Diff. in Marginal Means"))

dflines <- data.frame(linepos = c(0.5, 0), Estimate = c("Marginal Means",
                        "Diff. in Marginal Means"))

dflines$Estimate <- factor(dflines$Estimate,
                                   levels = c("Marginal Means",
                                              "Diff. in Marginal Means"))

ggplot(combined_female, aes(x = geschlecht, y = Prediction,
                            color = Gender, shape = Gender,
                            ymin = lower, ymax = upper)) +
  geom_hline(data = dflines, aes(yintercept = linepos), lty = 2) +
  geom_pointrange(position = position_dodge(0.4)) +
  facet_wrap(~ Estimate, scales = "free_x") +
  coord_flip() +
  theme_bw() +
  xlab("Gender of Candidate") +
  ylab("Estimate") +
  theme(legend.position = "bottom") +
  scale_color_manual("Gender of respondent", values = c("blue", "orange")) +
  scale_shape_manual("Gender of respondent", values = c(15:16))

ggsave("female_gender_int.pdf", width = 6, height = 3)

combined_party <- bind_rows(predParty, diffMMparty)

combined_party$Estimate <- factor(combined_party$Estimate,
                                   levels = c("Marginal Means",
                                              "Diff. in Marginal Means"))

dflines <- data.frame(linepos = c(0.5, 0), Estimate = c("Marginal Means",
                                                        "Diff. in Marginal Means"))

dflines$Estimate <- factor(dflines$Estimate,
                           levels = c("Marginal Means",
                                      "Diff. in Marginal Means"))

combined_party$Party <- factor(combined_party$Party, levels = c("AfD",
                                                  "CDU/CSU",
                                                  "Left Party",
                                                  "FDP",
                                                  "SPD",
                                                  "Greens"))

ggplot(combined_party, aes(x = geschlecht, y = Prediction,
                            color = Party,
                            shape = Party,
                            ymin = lower, ymax = upper)) +
  geom_hline(data = dflines, aes(yintercept = linepos), lty = 2) +
  geom_errorbar(position = position_dodge(0.4), width = 0) +
  geom_point(position = position_dodge(0.4), size = 2) +
  facet_wrap(~ Estimate, scales = "free_x") +
  coord_flip() +
  theme_bw() +
  xlab("Gender of Candidate") +
  ylab("Estimate") +
  theme(legend.position = "bottom") +
  scale_shape_manual("Party\n(GAL-TAN Ordered)", values = c(15,16,17,0:2)) +
  scale_color_manual("Party\n(GAL-TAN Ordered)", values = c("blue", "black", "purple", "orange", "red", "darkgreen"))

ggsave("female_party_int.pdf", width = 6, height = 4)

####################################################
# Age (FIGURE A1 in suppl. mat.)
####################################################

cj_data$age <- as.numeric(cj_data$age)

cj_data$age_resp <- ifelse(cj_data$age <= 30, "Age <= 30",
                           ifelse(cj_data$age > 30 & cj_data$age < 60,
                                  "30 < Age < 60",
                                  "Age >= 60")) %>%
  as.factor()


cj_data$age_resp <- relevel(cj_data$age_resp, ref = "Age <= 30")

mm_by_sat <- cj(cj_data, 
                selected ~ geschlecht + alter + bildung + wahlkreis + erfahrung + lokalgremien + abweichung,
                id = ~ response_id, 
                by = ~ age_resp,
                estimate = "mm",
                feature_labels = list(geschlecht = "Gender",
                                      alter = "Age",
                                      bildung = "Education",
                                      erfahrung = "Experience in\nlocal politics...",
                                      wahlkreis = "Residence in\ndistrict since...",
                                      lokalgremien = "Engagement in\nlocal party branch...",
                                      abweichung = "Deviates from\nparty line..."))

# Only keep relevant feature

mm_by_sat <- filter(mm_by_sat, 
                    feature == "Age")

# Prepare Variables for Plot

mm_by_sat$Respondent <- mm_by_sat$age_resp

mm_by_sat$Respondent <- relevel(as.factor(mm_by_sat$Respondent), ref = "Age <= 30")

# Plot results

ggplot(mm_by_sat, 
       aes(x = level, 
           y = estimate)) +
  coord_flip() +
  geom_hline(aes(yintercept = 0.5), 
             colour = "black", 
             linetype = 2, 
             size = .7) +
  geom_point(size = 1.5,
             position = position_dodge(0.3)) +
  geom_errorbar(aes(ymax = lower, 
                    ymin = upper), 
                width = 0,
                position = position_dodge(0.3)) +
  theme_bw() +
  facet_wrap(~ Respondent) +
  ylab("Marginal Mean") +
  xlab("Age of candidate...\n")

ggsave(file = "mm_age.pdf", width = 8, height = 4)

#####################################################
# MM by Level (Fig A2)
#####################################################

cj_data$Bundestag <- as.factor(ifelse(cj_data$level_bund == 1,
                               "National Parliament (Bundestag)",
                               "Regional Parliament (Landtag)"))

mm_by_sat <- cj(cj_data, 
                selected ~ geschlecht + alter + bildung + wahlkreis + erfahrung + lokalgremien + abweichung,
                id = ~ response_id, 
                by = ~ Bundestag,
                estimate = "mm",
                feature_labels = list(geschlecht = "Gender",
                                      alter = "Age",
                                      bildung = "Education",
                                      erfahrung = "Experience in\nlocal politics...",
                                      wahlkreis = "Residence in\ndistrict since...",
                                      lokalgremien = "Engagement in\nlocal party branch...",
                                      abweichung = "Deviates from\nparty line..."))

mm_by_sat_diff <- cj(cj_data, 
                     selected ~ geschlecht + alter + bildung + wahlkreis + erfahrung + lokalgremien + abweichung,
                     id = ~ response_id, 
                     by = ~ Bundestag,
                     estimate = "mm_diff",
                     feature_labels = list(geschlecht = "Gender",
                                           alter = "Age",
                                           bildung = "Education",
                                           erfahrung = "Experience in\nlocal politics...",
                                           wahlkreis = "Residence in\ndistrict since...",
                                           lokalgremien = "Engagement in\nlocal party branch...",
                                           abweichung = "Deviates from\nparty line..."))

# Prepare Variables for Plot

mm_by_sat$Respondent <- mm_by_sat$Bundestag
mm_by_sat_diff$Respondent <- mm_by_sat_diff$Bundestag

mm_by_sat$Estimate <- "Marginal Means"
mm_by_sat_diff$Estimate <- "Difference in Marginal Means"

mm_by_sat$hline <- 0.5
mm_by_sat_diff$hline <- 0

# Combine MM and Diff_MM

df_sat <- rbind(mm_by_sat, mm_by_sat_diff)

# Order Estimate Indicator

df_sat$Estimate <- factor(df_sat$Estimate, 
                          levels = c("Marginal Means",
                                     "Difference in Marginal Means"))

# Order group of respondents

df_sat$Respondent <- factor(df_sat$Respondent,
                            levels = c("National Parliament (Bundestag)",
                                       "Regional Parliament (Landtag)"))

# Plot results

# Main Plot

ggplot(df_sat, 
       aes(x = level, 
           y = estimate,
           group = Respondent, 
           color = Respondent)) +
  coord_flip() +
  geom_hline(aes(yintercept = hline), 
             colour = "black", 
             linetype = 2, 
             size = .7) +
  geom_point(size = 1.5, 
             position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymax = lower, 
                     ymin = upper, 
                     width = 0),
                position = position_dodge(width = 0.5)) +
  theme_bw() +
  facet_grid(feature ~ Estimate, 
             scales  ="free", 
             space = "free",
             switch = "y") +
  scale_x_discrete(position="top") +
  theme(plot.background = element_rect(fill = 'white'),
        axis.text.y = element_text(size=10.5, 
                                   color = "black"),
        axis.text.x = element_text(angle = 0,
                                   size=10.5, 
                                   color = "black"),
        strip.text.y = element_text(angle = 90,
                                    color = "black",
                                    size = 10.5),
        strip.text.y.left = element_text(angle = 0),
        strip.background = element_rect(fill = 'white')) +
  theme(plot.margin=unit(c(1,1,1,3),
                         "line"),
        panel.border = element_rect(colour = "black")) +
  xlab("") +
  ylab("Marginal Means") +
  theme(legend.position = "bottom") +
  scale_color_manual("Framing", values = c("grey50", "black"))

ggsave(file = "mm_level_eng.pdf", width = 10, height = 8)

#####################################################
# MM by Carry-Over (Fig A4)
#####################################################

cj_data$task <- as.factor(cj_data$task)

cj_anova(cj_data, 
   selected ~ geschlecht + alter + bildung + wahlkreis + erfahrung + lokalgremien + abweichung,
   by = ~ task)

mm_by_sat <- cj(cj_data, 
                selected ~ geschlecht + alter + bildung + wahlkreis + erfahrung + lokalgremien + abweichung,
                id = ~ response_id, 
                by = ~ task,
                estimate = "mm",
                feature_labels = list(geschlecht = "Gender",
                                      alter = "Age",
                                      bildung = "Education",
                                      erfahrung = "Experience in\nlocal politics...",
                                      wahlkreis = "Residence in\ndistrict since...",
                                      lokalgremien = "Engagement in\nlocal party branch...",
                                      abweichung = "Deviates from\nparty line..."))

color_values <- viridis::magma(12)

plot(mm_by_sat, group = "task") +
  scale_color_manual(values = color_values) +
  guides(color = F)

ggsave(file = "mm_carryover_eng.pdf", width = 10, height = 16)

########################################################################
# Satisficing: How frequently 1st or 2nd profile selected? (FIG A5) ####
########################################################################

profile_tab <- table(cj_data$profile[cj_data$selected == 1])

profile_tab

prop.table(profile_tab)

N_first <- profile_tab[1]

N <- sum(profile_tab)

n_range <- (N/2 - 100):(N/2 + 100)

probs <- pbinom(n_range, 
                size = N, 
                prob = 0.5)



test_for_sat <- function(x=1){
  
  profile_tab <- table(cj_data$profile[cj_data$selected == 1 & cj_data$task == x])
  
  prop <- profile_tab[1]/sum(profile_tab)  
  
  N_first <- profile_tab[1]
  
  N <- sum(profile_tab)
  
  pval <- pbinom(N_first, size = N, prob = 0.5)
  
  data.frame(prop, pval)
  
}

props <- map_df(1:10, test_for_sat)

order_eff_plot <- ggplot(data = NULL, aes(x = n_range, y = probs)) +
  geom_vline(aes(xintercept = N_first), 
             lty = 2, 
             color = "red") +
  geom_hline(aes(yintercept = pbinom(N_first, N, 0.5)),
             lty = 2, 
             color = "red") +
  geom_line(size = 1) +
  xlab("Number of 'successes'") +
  ylab("Cumulative binomial distribution\n(under p('success') = 0.5)") +
  theme_bw()

order_eff_plot

ggsave(order_eff_plot, 
       width = 10, 
       height = 5.5, 
       filename = "order_eff_plot.pdf")



## End of file ##
#################
